#Load Libraries and dataset
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(tidytuesdayR)

tuesdata <- tt_load(2024, week = 32)
## --- Compiling #TidyTuesday Information for 2024-08-06 ----
## --- There is 1 file available ---
## --- Starting Download ---
## 
##  Downloading file 1 of 1: `olympics.csv`
## --- Download complete ---
oly <- tuesdata$olympics
theme_set(theme_minimal())
df <- oly %>% 
  select(id, name, noc, year, season, city) %>% 
  group_by(id, name) %>% 
  unique() 

Function to visualise participation in the Olympics

vis <- function(data, seas = c("Summer", "Winter")){
  top_noc <- data %>% 
    filter(season == seas) %>% 
    group_by(noc) %>% 
    summarise(total_count = n()) %>% 
    ungroup() %>% 
    top_n(5) %>% 
    pull(noc)
  
  df <- data %>% 
    filter(noc %in% top_noc) %>% 
    group_by(year, noc) %>% 
    summarise(count = n(), .groups = 'drop') %>% 
    ungroup() %>% 
    group_by(noc) %>% 
    #mutate(label = ifelse(row_number() == n(), paste0(noc, " - ", count), NA)) %>% 
    ungroup() %>% 
    arrange(year, noc)
  
    # plot <- ggplot(df, aes(x = year, y = count, fill = noc) )+
    #   geom_area()+
    # #geom_line(alpha = 0.6, linewidth = 1) +
    # #geom_text(nudge_x = 3, size = 4)+
    #   labs(title = paste0("Top 5 Country Participation - ", seas, " Olympics"))+
    #   ylab("Total athletes")+
    #   theme_bw()+
    #   theme(legend.position = 'bottom',
    #       legend.title = element_blank())
    
    fig <- plot_ly(data = df, 
                   x =~year, 
                   y = ~count, 
                   type = 'scatter', 
                   color = ~noc, 
                   mode = 'line', 
                   stackgroup = 'one',
                   hoverinfo = 'text',
                   hovertext = paste0("Country: ", df$noc,
                                  "<br>Count: ", df$count,
                                  "<br>Year:", df$year) ) %>% 
  layout(title = paste0("Top 5 country participation - ", seas, " Olympics"),
         yaxis = list(title = "Total Athletes"),
         xaxis = list(title = "Year"),
         legend = list(title = list(text = '<b>Country</b>')))
  
    return(fig)
  
}

Winter Olympics visual

vis(oly, 'Winter')
## Selecting by total_count
# top_noc <- oly %>% filter(season == 'Winter') %>% 
#   group_by(noc) %>% 
#   summarise(total_count = n()) %>% 
#   ungroup() %>% 
#   top_n(5) %>% pull(noc)
# 
# oly %>%
#   filter(noc %in% top_noc) %>% 
#     group_by(year, noc) %>% 
#     summarise(count = n(), .groups = 'drop') %>% 
#     ungroup() %>% 
#     group_by(noc) %>% 
#     mutate(label = ifelse(row_number() == n(), paste0(noc, " - ", count), NA)) %>% 
#     ungroup() %>% 
#     arrange(year, noc) %>% 
#     ggplot(aes(x = year, y = count, fill = noc))+geom_area()+
#     #geom_line(alpha = 0.6, linewidth = 1) +
#     #geom_text(nudge_x = 3, size = 4)+
#     labs(title = paste0("Top 5 Country Participation - Olympics"))+
#     ylab("Number of athletes")+
#     theme_bw()+
#     theme(legend.position = 'bottom',
#           legend.title = element_blank())

Summer Olympics visual

vis(oly, 'Summer')
## Selecting by total_count

Box plot of Athlete’s height by sport

Use the function to visualise height or weight difference over the years in the Olympics.

boxplot <- function(sports, gen, metric = c('height', 'weight')){
  
  df <- oly %>% 
    mutate(sex = ifelse(sex == 'M', "Male", "Female"))
  
  if(metric == 'height'){
  fig <- df %>% 
    filter(
      sport == sports,
      sex == gen
    ) %>% 
    ggplot(aes(x = year, y = height))+
    geom_boxplot(aes(group = factor(year)))+
    geom_smooth(method = 'loess', formula = 'y~x', se = FALSE)+
    scale_x_continuous(breaks = 193:201 *10) +
    labs(x = NULL, y = 'Height (cm)',
         title = paste0('Heights of ', gen, ' ', sports, ' athletes in the Olympics'),
         caption = 'Source: Kaggle Olympics history data')+
    theme_bw()
  
  }else{
    fig <- df %>% 
    filter(
      sport == sports,
      sex == gen
    ) %>% 
    ggplot(aes(x = year, y = weight))+
    geom_boxplot(aes(group = factor(year)))+
    geom_smooth(method = 'loess', formula = 'y~x', se = FALSE)+
    scale_x_continuous(breaks = 193:201 *10) +
    labs(x = NULL, y = 'Weight (kg)',
         title = paste0('Weights of ', gen, ' ', sports, ' athletes in the Olympics'),
         caption = 'Source: Kaggle Olympics history data')+
      theme_bw()
  }
  return (fig)  
}
boxplot("Basketball", "Male", 'height')
## Warning: Removed 650 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 650 rows containing non-finite outside the scale range
## (`stat_smooth()`).

boxplot("Gymnastics", "Female", 'weight')
## Warning: Removed 1552 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1552 rows containing non-finite outside the scale range
## (`stat_smooth()`).

boxplot("Swimming", "Male", 'height')
## Warning: Removed 2767 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2767 rows containing non-finite outside the scale range
## (`stat_smooth()`).